set more off 
pause off
set logtype text
set mem 500M

tempfile currtemp

*************** DESCRIPTION *************************************
* Loads Census concentration data
*
*	Input: 		Variety of files sourced from Census. 
*	Output: 	2.intermediate\cencon_out --> concentration ratios
*				by NAICS-2 to NAICS-6
*
*****************************************************************

use 2.intermediate\CenCon_naics, clear

replace naics = "440" if naics == "44-45"
replace naics = "420" if naics == "42"
replace naics = "810" if naics == "81"

g len = strlen(naicsid)
keep if len == 3

* Drop inconsistent industries and industries where higher level is chosen

* Inconsistent industry 
destring naicsid, replace
drop if inrange(naics,513,519) & year < 2007  
drop if inrange(naics,441,459) | inrange(naics,421,429) | inrange(naics,811,814)

rename naicsid naics
save `currtemp', replace

* Map to BEA codes
import excel 1.user_inputs\NAICS2BEA.xlsx, firstrow clear
merge 1:m naics using `currtemp', keep(matched using) nogen
save `currtemp', replace

* Map to BEA segments 
merge m:1 beacode using 6.Temp\levelkey, keep(matched) nogen
g indcode = ind_short  

* Aggregate to desired industries 
foreach X in a1_cenconc4 a1_cenconc8 a1_cenconc20 a1_cenconc50 hhi{
	egen tt = wtmean(`X'), weight(sales) by(indcode year)
	replace `X' = tt
	drop tt
}

bys indcode year : keep if _n ==1 
drop sales
rename hhi a1_cenhhi
keep indcode year a1*
order indcode year

compress
save 2.intermediate\cencon_out, replace

g test2 =  (a1_cenconc8 - 41.6) if indcode == "Nondur_Paper" & year == 2002
su test*
drop test*
pause
